in progress/BOGGLE/wordtest.R

wordtest=function(){

fricatives="[fgsvz]" # "ch", "c"
liquids="[lr]"
nasals="[mn]"
plosives="[bdptk]"
obstruents="[bdptkfgsvz]"
digraphs=c("ei","ui","oe","ch","ng","ie","au","ou")
vowels="[aiuoey]"
consonants="[bcdfghjklmnpqrstvwxz]"
voicedobstruents="[bdzv]"
voicelessobstruents="[ptks]"
highvowels="[ieu]"
lowvowels="[ao]"
frontconsonants="[bptdnmsz]"
backconsonatns="[gk]"
specials="[cwqxh]"

dutch=as.matrix(read.table("/Users/ling-jwe/Documents/Awks/dutchlist.txt",header=F))
dutch[regexpr(paste("fa$",sep=""),dutch)>0]

# word-initial consonant combinations
cbind(
paste("^",liquids,consonants,sep=""), # 
paste("^",nasals,consonants,sep=""),
paste("^","h",consonants,sep=""),
paste("^",plosives,plosives,sep=""),
paste("^",plosives,liquids,"w",sep=""),
paste("^",specials,obstruents,sep=""), # csardas
paste("^",voicedobstruents,voicelessobstruents,sep=""),
paste("^",voicelessobstruents,voicedobstruents,sep=""), # sven
paste("^",voicedobstruents,voicedobstruents,sep=""), # sven
paste("^",voicedobstruents,nasals,sep=""), # sven
paste("^t[nfgjlmsvxz]",sep=""),
paste("^v[bcdfghjkmnpqrstvwxyz]",sep=""),
paste("^b[bcdfghjkmnpqrstvwxyz]",sep=""),

sapply(letters[1:26],function(x){length(dutch)-sum(regexpr(paste(x,"b$",sep=""),dutch)<0)})

dutch[regexpr("c$",dutch)>0]

# word ends on "d"
"[bcdhkptvxyz]d$"
"[]ad$"
"[]ed$"
"[]fd$"
"[]gd$"
"[]id$"
"[]jd$"
"[]ld$"
"[]md$"
"[]nd$"
"[]od$"
"[]rd$"
"[]sd$"
"[]ud$"
"[]wd$"

# words ends on "c"
"[bcdefghjklmnopqrstuvwxyz]c$"
"[abcdfghijklmnopqrstuvwxyz]ac$"
"[abcdefgijklmoqrsuvwxyz]ic$"

# word ends on "b"
"[bcdfghjklmnpqrstvwxyz]b$"
"[cdegijkmnopqstuvxyz]ab"
"[acdefjklpqstuvwxyz]eb"
"[abcdefghijkmnopqstvwxyz]ib"
"[acdefgijmpqsuvwxyz]ob"
"[abcefgijkmnopqrsuvwxyz]ub"

# Word ends on "a"

"[aouxfqhyxw]a$"
"[bcdefghijknpqstvwxyz]ba$"
"[bdfghjklmpqstvwxyz]ca$"
"[bcfhjkmpqstvwxyz]da"
"[aefgijmpqtuwxyz]ea"
"[bchjoqvwxyz]ga"
"[eijoquwxy]ia"
"[bcgmpqtvwxyz]ka"
"[cghkmqtwxz]la"
"[cdfhjknpqvwxyz]ma"
"[bcfklmqstvxyz]na"
"[bcdfghjknquvwxyz]pa"
"[jknqvwxz]ra"
"[bcdfghjmqrvwxyz]sa"
"[bdghjkmpqvwyz]ta"
"[abdefgijklmorstuvwxyz]ua"
"[bcdefghjkmnpqstvwxyz]va"
"[abcdefghjkmpqrstvwxy]za"






# word-final impossible consonant combinations
paste("","a$",sep="")
paste(plosives,liquids,"$",sep=""), # pjotr
paste(nasals,liquids,"$",sep=""), # kreml
paste(fricatives,liquids,"$",sep=""), # no occurrences
paste("[zv]","$",sep=""), # jazz, quiz
paste(liquids,liquids,"$",sep=""), # football, overall
paste("[knfchpqtvwxz]","[p]","$",sep=""),
paste("[kfhpqtvwxz]","[k]","$",sep=""),
paste(consonants,"[jxq]$",sep=""),
paste("[bcdfghjklmnpqrstvwxyz]","b$",sep=""),
paste(consonants,consonants,nasals,"$",sep=""),
paste(consonants,consonants,liquids,"$",sep=""),
paste(plosives,plosives,plosives,"$",sep=""),
paste(liquids,"[cjbhlqrvwxz]",nasals,sep=""),
paste("[fsvzbdptkcqxh]",nasals,"$",sep=""),

# word-medial impossible consonant combinations
paste(obstruents,liquids,nasals,sep=""), # no exceptions
paste(obstruents,liquids,liquids,sep=""), # no exceptions
paste(nasals,nasals,obstruents,sep=""), # no real exceptions
paste(obstruents,nasals,nasals,sep=""), # no real exceptions
paste("[vz]","[tkp]",sep=""), # azteken
paste(liquids,"[cjbhlqrvwxz]",nasals,sep=""),

# word-initial impossible vowel combinations
paste("^","[ue]a",sep=""),
paste("^","[u]o",sep=""),

# word-final impossible vowel combinations
paste(consonants,"[yoncdfghijklmpqrstvwxyz]i",sep=""), # tolstoi

# word-medial impossible combinations
paste(consonants,"i","i",sep=""))


}

find=function(){
	freq=sapply(letters[1:26],function(letter0){length(dutch)-sum(regexpr(paste(x,letters[1:26],"$",sep=""),dutch)<0)})
	names(freq[which(freq<2)])}

find=function(){
	sapply(letters[1:26],function(letter0)
		{sapply(letters[1:26],function(letter1)
			{length(dutch)-sum(regexpr(paste(letter1,letter0,"$",sep=""),dutch)<0)
				})})}

# # # 

# extract all combinations of two letters at word onsets and offsets
lettercombinations=outer(1:26,1:26,function(r,c){paste(letters[r],letters[c],sep="")})
onsets2=sapply(lettercombinations,function(x){sum(regexpr(paste("^",x,sep=""),dutch)>0)})
offsets2=sapply(lettercombinations,function(x){sum(regexpr(paste(x,"$",sep=""),dutch)>0)})
onsets2.res=sapply(letters[1:26],function(x){paste(substr(names(onsets2[which(onsets2==0&substr(names(onsets2),1,1)==x)]),2,2),collapse="")})
offsets2.res=sapply(letters[1:26],function(x){paste(substr(names(offsets2[which(offsets2==0&substr(names(offsets2),2,2)==x)]),1,1),collapse="")})

lettercombinations=outer(1:26,1:length(offsets2[offsets2>0]),function(r,c){paste(letters[r],names(offsets2[offsets2>0]),sep="")})
offsets3=sapply(lettercombinations,function(x){sum(regexpr(paste(x,"$",sep=""),dutch)>0)})
lettercombinations=outer(1:26,1:length(offsets2[onsets2>0]),function(r,c){paste(letters[r],names(onsets2[onsets2>0]),sep="")})
onsets3=sapply(lettercombinations,function(x){sum(regexpr(paste("^",x,sep=""),dutch)>0)})

onsets3.res=sapply(names(onsets2[onsets2>1]),function(x){paste(substr(names(onsets3[which(onsets3==0&substr(names(onsets3),1,2)==x)]),3,3),collapse="")})
offsets3.res=sapply(names(offsets2[offsets2>1]),function(x){paste(substr(names(offsets3[which(offsets3==0&substr(names(offsets3),2,3)==x)]),1,1),collapse="")})

write.table(
c(paste("^",names(c(onsets2.res,onsets3.res)),"[",c(onsets2.res,onsets3.res),"]",sep=""),
paste("[",c(offsets2.res,offsets3.res),"]",names(c(offsets2.res,offsets3.res)),"$",sep="")),file="myresults.txt",row.names=F,col.names=F)

onsets3.res[1:20]

	paste(substr(wordoffsets[wordoffsets<2&substr(wordoffsets,2,2)==x],1,1),collapse="")})
paste(wordoffsets[wordoffsets<1])


outer(1:26,1:26,paste(letters[r],letters[c],sep=""))

sapply(1:26,function(x){paste("[",paste(letters[which(find.matrix[,x]<2)],collapse=""),"]",letters[x],"$",collapse="")}) # increase 2 for smaller restriction

lapply(letters[1:26],function(letter0){
	lapply(letters[1:26],function(letter1){if(find.matrix[letter1,letter0]>1)
		paste(letter1,letter0)})})


find.matrix=sapply(letters[1:26],function(letter0)
		{sapply(letters[1:26],function(letter1)
			{length(dutch)-sum(regexpr(paste(letter1,letter0,"$",sep=""),dutch)<0)})})


if(find.matrix[letter1,letter0]>1){paste(letter1,letter0,collapse="")}})})

length(letters[1:26])

digraphs=paste(rep(letters[1:26],each=26),rep(letters[1:26],26),sep="")
digraphs.freqs=sapply(1:length(digraphs),function(x){sum(regexpr(digraphs[x],dutch)>0)})

trigraphs=paste(rep(letters[1:26],each=26*26),rep(rep(letters[1:26],each=26),26),rep(letters[1:26],26*26),sep="")
trigraphs.freqs=sapply(1:length(trigraphs),function(x){sum(regexpr(trigraphs[x],dutch)>0)})
vdweijer/games documentation built on Dec. 23, 2021, 3:02 p.m.